## Import data
covid =
read_csv("./data/covid_data_monthly.csv") %>%
mutate(month = month.name[as.numeric(month)],
month = as.factor(month),
month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
year = as.character(year),
Confirmed_of_Month = as.numeric(Confirmed_of_Month))
## take a look at covid plot
covid %>%
ggplot(aes(x = month, y = Confirmed_of_Month, color = year)) +
geom_line(aes(group = year)) +
geom_point(size = 1.5)

export_volume_df =
read_csv("./data/cleaned data/export_volume_combined.csv") %>%
mutate(month = as.factor(month),
month = recode(month, "Feburary" = "February"),
month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
year = as.character(year))
export_volume_df %>%
group_by(month,year) %>%
summarize(sum_of_export = sum(export_volume)) %>%
ggplot(aes(x = month, y = sum_of_export, color = year)) +
geom_line(aes(group = year)) +
geom_point(size = 1.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

import_volume_df =
read_csv("./data/cleaned data/import_volume_combined.csv") %>%
mutate(month = as.factor(month),
month = recode(month, "Feburary" = "February"),
month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
year = as.character(year))
import_volume_df %>%
group_by(month,year) %>%
summarize(sum_of_import = sum(import_volume)) %>%
ggplot(aes(x = month, y = sum_of_import, color = year)) +
geom_line(aes(group = year)) +
geom_point(size = 1.5) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

combined_df = list(covid, export_volume_df, import_volume_df) %>%
reduce(left_join, by = c("year", "month"), all = TRUE) %>%
janitor::clean_names() %>%
rename("export_product_type" = "product_type_x",
"import_product_type" = "product_type_y")
export vs. covid case
export_fit = lm(export_volume ~ confirmed_of_month, data = combined_df)
summary(export_fit)
Call:
lm(formula = export_volume ~ confirmed_of_month, data = combined_df)
Residuals:
Min 1Q Median 3Q Max
-1885.0 -1684.6 -1548.5 -417.6 17170.7
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.930e+03 8.917e+01 21.641 <2e-16 ***
confirmed_of_month -1.344e-07 1.230e-07 -1.093 0.274
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3489 on 4444 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.0002687, Adjusted R-squared: 4.376e-05
F-statistic: 1.195 on 1 and 4444 DF, p-value: 0.2745
export_df =
combined_df %>%
na.omit() %>%
nest(data = confirmed_of_month:export_volume) %>%
mutate(
lm_fits = map(.x = data, ~ lm(export_volume ~ confirmed_of_month + deaths_of_month, data = .x)),
lm_results = map(lm_fits, broom::tidy)) %>%
select(month, year, lm_results) %>%
unnest(lm_results)
export_df %>%
na.omit() %>%
ggplot(aes(x = estimate)) +
geom_density()

export_plot = combined_df %>%
group_by(month, year, confirmed_of_month) %>%
summarize(export_sum = sum(export_volume)) %>%
ggplot(aes(x = confirmed_of_month, y = export_sum)) +
geom_point(alpha = 0.5) +
scale_y_continuous() +
geom_smooth(se = F, color = "red", method = "lm", size = 1, linetype = 2) +
labs(title = "Covid Cases vs. Export Volume",
x = "Covid Cases",
y = "Export Volume")
combined_df %>%
mutate(
text_label = str_c("Confirmed Cases: $", confirmed_of_month, "\nExport Volume: ", export_volume)) %>%
plot_ly(
x = ~ confirmed_of_month, y = ~ export_volume, type = "scatter", mode = "markers", color = ~month, text = ~text_label, alpha = 0.5)
import vs. covid case
import_fit = lm(import_volume ~ confirmed_of_month, data = combined_df)
summary(import_fit)
Call:
lm(formula = import_volume ~ confirmed_of_month, data = combined_df)
Residuals:
Min 1Q Median 3Q Max
-315.7 -218.6 -167.6 108.7 1011.7
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.265e+02 8.343e+00 39.14 <2e-16 ***
confirmed_of_month 5.060e-09 1.150e-08 0.44 0.66
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 326.4 on 4444 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 4.352e-05, Adjusted R-squared: -0.0001815
F-statistic: 0.1934 on 1 and 4444 DF, p-value: 0.6601
import_df =
combined_df %>%
na.omit() %>%
nest(data = confirmed_of_month:import_volume) %>%
mutate(
lm_fits = map(.x = data, ~ lm(import_volume ~ confirmed_of_month, data = .x), na.action = na.omit()),
lm_results = map(lm_fits, broom::tidy)) %>%
select(month, year, lm_results) %>%
unnest(lm_results)
import_df %>%
ggplot(aes(x = estimate)) +
geom_density()

import_plot = combined_df %>%
group_by(month, year, confirmed_of_month) %>%
summarize(import_sum = sum(import_volume)) %>%
ggplot(aes(x = confirmed_of_month, y = import_sum)) +
geom_point(alpha = 0.5) +
scale_y_continuous() +
geom_smooth(se = F, color = "red", method = "lm", size = 1, linetype = 2) +
labs(title = "Covid Cases vs. Import Volume",
x = "Covid Cases",
y = "Import Volume")
combined_df %>%
group_by(month, year, confirmed_of_month) %>%
summarize(import_sum = sum(import_volume)) %>%
mutate(
text_label = str_c("Confirmed Cases: $", confirmed_of_month, "\nImport Volume: ", import_sum)) %>%
plot_ly(
x = ~ confirmed_of_month, y = ~ import_sum, type = "scatter", mode = "markers", color = ~month, text = ~text_label, alpha = 0.5)
ggpubr::ggarrange(import_plot, export_plot)
